{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Importing multi-sample VCF (Illumina-style) into Hail\n",
    "\n",
    "This tutorial is for Hail v0.2 https://hail.is/docs/devel/index.html."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Running on Apache Spark version 2.2.0\n",
      "SparkUI available at http://10.46.229.155:4040\n",
      "Welcome to\n",
      "     __  __     <>__\n",
      "    / /_/ /__  __/ /\n",
      "   / __  / _ `/ / /\n",
      "  /_/ /_/\\_,_/_/_/   version devel-2610434c191b\n",
      "NOTE: This is a beta version. Interfaces may change\n",
      "  during the beta period. We recommend pulling\n",
      "  the latest changes weekly.\n"
     ]
    }
   ],
   "source": [
    "import hail as hl\n",
    "import hail.expr.aggregators as agg\n",
    "hl.init()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Import block-compressed VCF, split multi-allelic sites and perform variant + sample QC."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2018-09-21 11:22:32 Hail: INFO: Coerced almost-sorted dataset\n",
      "2018-09-21 11:23:12 Hail: INFO: Coerced almost-sorted dataset\n"
     ]
    }
   ],
   "source": [
    "vcf='polaris.chr20.hail.vcf.bgz'\n",
    "vt = hl.import_vcf(vcf)\n",
    "vt = hl.split_multi_hts(vt)\n",
    "vt = hl.variant_qc(vt)\n",
    "vt = hl.sample_qc(vt).cache()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Have 984253 samples and 148 variants\n",
      "+---------------+------------+\n",
      "| locus         | alleles    |\n",
      "+---------------+------------+\n",
      "| locus<GRCh37> | array<str> |\n",
      "+---------------+------------+\n",
      "| 20:60006      | [\"A\",\"C\"]  |\n",
      "| 20:60008      | [\"A\",\"C\"]  |\n",
      "| 20:60020      | [\"A\",\"C\"]  |\n",
      "| 20:60024      | [\"A\",\"G\"]  |\n",
      "| 20:60053      | [\"G\",\"T\"]  |\n",
      "+---------------+------------+\n",
      "showing top 5 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "cnt = vt.count()\n",
    "print(\"Have {} samples and {} variants\".format(cnt[0],cnt[1]))\n",
    "vt.rows().select().show(5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Variant and sample QC create additional row and column fields, respectively. Examples are call rate (% calls for a variant across samples), ratio of transitions vs transversion etc."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "    <div class=\"bk-root\">\n",
       "        <a href=\"https://bokeh.pydata.org\" target=\"_blank\" class=\"bk-logo bk-logo-small bk-logo-notebook\"></a>\n",
       "        <span id=\"6e1f5720-b2a7-492f-9c6c-e1b6cb9b74bc\">Loading BokehJS ...</span>\n",
       "    </div>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/javascript": [
       "\n",
       "(function(root) {\n",
       "  function now() {\n",
       "    return new Date();\n",
       "  }\n",
       "\n",
       "  var force = true;\n",
       "\n",
       "  if (typeof (root._bokeh_onload_callbacks) === \"undefined\" || force === true) {\n",
       "    root._bokeh_onload_callbacks = [];\n",
       "    root._bokeh_is_loading = undefined;\n",
       "  }\n",
       "\n",
       "  var JS_MIME_TYPE = 'application/javascript';\n",
       "  var HTML_MIME_TYPE = 'text/html';\n",
       "  var EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n",
       "  var CLASS_NAME = 'output_bokeh rendered_html';\n",
       "\n",
       "  /**\n",
       "   * Render data to the DOM node\n",
       "   */\n",
       "  function render(props, node) {\n",
       "    var script = document.createElement(\"script\");\n",
       "    node.appendChild(script);\n",
       "  }\n",
       "\n",
       "  /**\n",
       "   * Handle when an output is cleared or removed\n",
       "   */\n",
       "  function handleClearOutput(event, handle) {\n",
       "    var cell = handle.cell;\n",
       "\n",
       "    var id = cell.output_area._bokeh_element_id;\n",
       "    var server_id = cell.output_area._bokeh_server_id;\n",
       "    // Clean up Bokeh references\n",
       "    if (id !== undefined) {\n",
       "      Bokeh.index[id].model.document.clear();\n",
       "      delete Bokeh.index[id];\n",
       "    }\n",
       "\n",
       "    if (server_id !== undefined) {\n",
       "      // Clean up Bokeh references\n",
       "      var cmd = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n",
       "      cell.notebook.kernel.execute(cmd, {\n",
       "        iopub: {\n",
       "          output: function(msg) {\n",
       "            var element_id = msg.content.text.trim();\n",
       "            Bokeh.index[element_id].model.document.clear();\n",
       "            delete Bokeh.index[element_id];\n",
       "          }\n",
       "        }\n",
       "      });\n",
       "      // Destroy server and session\n",
       "      var cmd = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n",
       "      cell.notebook.kernel.execute(cmd);\n",
       "    }\n",
       "  }\n",
       "\n",
       "  /**\n",
       "   * Handle when a new output is added\n",
       "   */\n",
       "  function handleAddOutput(event, handle) {\n",
       "    var output_area = handle.output_area;\n",
       "    var output = handle.output;\n",
       "\n",
       "    // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n",
       "    if ((output.output_type != \"display_data\") || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n",
       "      return\n",
       "    }\n",
       "\n",
       "    var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n",
       "\n",
       "    if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n",
       "      toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n",
       "      // store reference to embed id on output_area\n",
       "      output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n",
       "    }\n",
       "    if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n",
       "      var bk_div = document.createElement(\"div\");\n",
       "      bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n",
       "      var script_attrs = bk_div.children[0].attributes;\n",
       "      for (var i = 0; i < script_attrs.length; i++) {\n",
       "        toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n",
       "      }\n",
       "      // store reference to server id on output_area\n",
       "      output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n",
       "    }\n",
       "  }\n",
       "\n",
       "  function register_renderer(events, OutputArea) {\n",
       "\n",
       "    function append_mime(data, metadata, element) {\n",
       "      // create a DOM node to render to\n",
       "      var toinsert = this.create_output_subarea(\n",
       "        metadata,\n",
       "        CLASS_NAME,\n",
       "        EXEC_MIME_TYPE\n",
       "      );\n",
       "      this.keyboard_manager.register_events(toinsert);\n",
       "      // Render to node\n",
       "      var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n",
       "      render(props, toinsert[toinsert.length - 1]);\n",
       "      element.append(toinsert);\n",
       "      return toinsert\n",
       "    }\n",
       "\n",
       "    /* Handle when an output is cleared or removed */\n",
       "    events.on('clear_output.CodeCell', handleClearOutput);\n",
       "    events.on('delete.Cell', handleClearOutput);\n",
       "\n",
       "    /* Handle when a new output is added */\n",
       "    events.on('output_added.OutputArea', handleAddOutput);\n",
       "\n",
       "    /**\n",
       "     * Register the mime type and append_mime function with output_area\n",
       "     */\n",
       "    OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n",
       "      /* Is output safe? */\n",
       "      safe: true,\n",
       "      /* Index of renderer in `output_area.display_order` */\n",
       "      index: 0\n",
       "    });\n",
       "  }\n",
       "\n",
       "  // register the mime type if in Jupyter Notebook environment and previously unregistered\n",
       "  if (root.Jupyter !== undefined) {\n",
       "    var events = require('base/js/events');\n",
       "    var OutputArea = require('notebook/js/outputarea').OutputArea;\n",
       "\n",
       "    if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n",
       "      register_renderer(events, OutputArea);\n",
       "    }\n",
       "  }\n",
       "\n",
       "  \n",
       "  if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n",
       "    root._bokeh_timeout = Date.now() + 5000;\n",
       "    root._bokeh_failed_load = false;\n",
       "  }\n",
       "\n",
       "  var NB_LOAD_WARNING = {'data': {'text/html':\n",
       "     \"<div style='background-color: #fdd'>\\n\"+\n",
       "     \"<p>\\n\"+\n",
       "     \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n",
       "     \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n",
       "     \"</p>\\n\"+\n",
       "     \"<ul>\\n\"+\n",
       "     \"<li>re-rerun `output_notebook()` to attempt to load from CDN again, or</li>\\n\"+\n",
       "     \"<li>use INLINE resources instead, as so:</li>\\n\"+\n",
       "     \"</ul>\\n\"+\n",
       "     \"<code>\\n\"+\n",
       "     \"from bokeh.resources import INLINE\\n\"+\n",
       "     \"output_notebook(resources=INLINE)\\n\"+\n",
       "     \"</code>\\n\"+\n",
       "     \"</div>\"}};\n",
       "\n",
       "  function display_loaded() {\n",
       "    var el = document.getElementById(\"6e1f5720-b2a7-492f-9c6c-e1b6cb9b74bc\");\n",
       "    if (el != null) {\n",
       "      el.textContent = \"BokehJS is loading...\";\n",
       "    }\n",
       "    if (root.Bokeh !== undefined) {\n",
       "      if (el != null) {\n",
       "        el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n",
       "      }\n",
       "    } else if (Date.now() < root._bokeh_timeout) {\n",
       "      setTimeout(display_loaded, 100)\n",
       "    }\n",
       "  }\n",
       "\n",
       "\n",
       "  function run_callbacks() {\n",
       "    try {\n",
       "      root._bokeh_onload_callbacks.forEach(function(callback) { callback() });\n",
       "    }\n",
       "    finally {\n",
       "      delete root._bokeh_onload_callbacks\n",
       "    }\n",
       "    console.info(\"Bokeh: all callbacks have finished\");\n",
       "  }\n",
       "\n",
       "  function load_libs(js_urls, callback) {\n",
       "    root._bokeh_onload_callbacks.push(callback);\n",
       "    if (root._bokeh_is_loading > 0) {\n",
       "      console.log(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n",
       "      return null;\n",
       "    }\n",
       "    if (js_urls == null || js_urls.length === 0) {\n",
       "      run_callbacks();\n",
       "      return null;\n",
       "    }\n",
       "    console.log(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n",
       "    root._bokeh_is_loading = js_urls.length;\n",
       "    for (var i = 0; i < js_urls.length; i++) {\n",
       "      var url = js_urls[i];\n",
       "      var s = document.createElement('script');\n",
       "      s.src = url;\n",
       "      s.async = false;\n",
       "      s.onreadystatechange = s.onload = function() {\n",
       "        root._bokeh_is_loading--;\n",
       "        if (root._bokeh_is_loading === 0) {\n",
       "          console.log(\"Bokeh: all BokehJS libraries loaded\");\n",
       "          run_callbacks()\n",
       "        }\n",
       "      };\n",
       "      s.onerror = function() {\n",
       "        console.warn(\"failed to load library \" + url);\n",
       "      };\n",
       "      console.log(\"Bokeh: injecting script tag for BokehJS library: \", url);\n",
       "      document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
       "    }\n",
       "  };var element = document.getElementById(\"6e1f5720-b2a7-492f-9c6c-e1b6cb9b74bc\");\n",
       "  if (element == null) {\n",
       "    console.log(\"Bokeh: ERROR: autoload.js configured with elementid '6e1f5720-b2a7-492f-9c6c-e1b6cb9b74bc' but no matching script tag was found. \")\n",
       "    return false;\n",
       "  }\n",
       "\n",
       "  var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-0.12.16.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.16.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-0.12.16.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-0.12.16.min.js\"];\n",
       "\n",
       "  var inline_js = [\n",
       "    function(Bokeh) {\n",
       "      Bokeh.set_log_level(\"info\");\n",
       "    },\n",
       "    \n",
       "    function(Bokeh) {\n",
       "      \n",
       "    },\n",
       "    function(Bokeh) {\n",
       "      console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-0.12.16.min.css\");\n",
       "      Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-0.12.16.min.css\");\n",
       "      console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.16.min.css\");\n",
       "      Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.16.min.css\");\n",
       "      console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-tables-0.12.16.min.css\");\n",
       "      Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-tables-0.12.16.min.css\");\n",
       "    }\n",
       "  ];\n",
       "\n",
       "  function run_inline_js() {\n",
       "    \n",
       "    if ((root.Bokeh !== undefined) || (force === true)) {\n",
       "      for (var i = 0; i < inline_js.length; i++) {\n",
       "        inline_js[i].call(root, root.Bokeh);\n",
       "      }if (force === true) {\n",
       "        display_loaded();\n",
       "      }} else if (Date.now() < root._bokeh_timeout) {\n",
       "      setTimeout(run_inline_js, 100);\n",
       "    } else if (!root._bokeh_failed_load) {\n",
       "      console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n",
       "      root._bokeh_failed_load = true;\n",
       "    } else if (force !== true) {\n",
       "      var cell = $(document.getElementById(\"6e1f5720-b2a7-492f-9c6c-e1b6cb9b74bc\")).parents('.cell').data().cell;\n",
       "      cell.output_area.append_execute_result(NB_LOAD_WARNING)\n",
       "    }\n",
       "\n",
       "  }\n",
       "\n",
       "  if (root._bokeh_is_loading === 0) {\n",
       "    console.log(\"Bokeh: BokehJS loaded, going straight to plotting\");\n",
       "    run_inline_js();\n",
       "  } else {\n",
       "    load_libs(js_urls, function() {\n",
       "      console.log(\"Bokeh: BokehJS plotting callback run at\", now());\n",
       "      run_inline_js();\n",
       "    });\n",
       "  }\n",
       "}(window));"
      ],
      "application/vnd.bokehjs_load.v0+json": "\n(function(root) {\n  function now() {\n    return new Date();\n  }\n\n  var force = true;\n\n  if (typeof (root._bokeh_onload_callbacks) === \"undefined\" || force === true) {\n    root._bokeh_onload_callbacks = [];\n    root._bokeh_is_loading = undefined;\n  }\n\n  \n\n  \n  if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n    root._bokeh_timeout = Date.now() + 5000;\n    root._bokeh_failed_load = false;\n  }\n\n  var NB_LOAD_WARNING = {'data': {'text/html':\n     \"<div style='background-color: #fdd'>\\n\"+\n     \"<p>\\n\"+\n     \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n     \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n     \"</p>\\n\"+\n     \"<ul>\\n\"+\n     \"<li>re-rerun `output_notebook()` to attempt to load from CDN again, or</li>\\n\"+\n     \"<li>use INLINE resources instead, as so:</li>\\n\"+\n     \"</ul>\\n\"+\n     \"<code>\\n\"+\n     \"from bokeh.resources import INLINE\\n\"+\n     \"output_notebook(resources=INLINE)\\n\"+\n     \"</code>\\n\"+\n     \"</div>\"}};\n\n  function display_loaded() {\n    var el = document.getElementById(\"6e1f5720-b2a7-492f-9c6c-e1b6cb9b74bc\");\n    if (el != null) {\n      el.textContent = \"BokehJS is loading...\";\n    }\n    if (root.Bokeh !== undefined) {\n      if (el != null) {\n        el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n      }\n    } else if (Date.now() < root._bokeh_timeout) {\n      setTimeout(display_loaded, 100)\n    }\n  }\n\n\n  function run_callbacks() {\n    try {\n      root._bokeh_onload_callbacks.forEach(function(callback) { callback() });\n    }\n    finally {\n      delete root._bokeh_onload_callbacks\n    }\n    console.info(\"Bokeh: all callbacks have finished\");\n  }\n\n  function load_libs(js_urls, callback) {\n    root._bokeh_onload_callbacks.push(callback);\n    if (root._bokeh_is_loading > 0) {\n      console.log(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n      return null;\n    }\n    if (js_urls == null || js_urls.length === 0) {\n      run_callbacks();\n      return null;\n    }\n    console.log(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n    root._bokeh_is_loading = js_urls.length;\n    for (var i = 0; i < js_urls.length; i++) {\n      var url = js_urls[i];\n      var s = document.createElement('script');\n      s.src = url;\n      s.async = false;\n      s.onreadystatechange = s.onload = function() {\n        root._bokeh_is_loading--;\n        if (root._bokeh_is_loading === 0) {\n          console.log(\"Bokeh: all BokehJS libraries loaded\");\n          run_callbacks()\n        }\n      };\n      s.onerror = function() {\n        console.warn(\"failed to load library \" + url);\n      };\n      console.log(\"Bokeh: injecting script tag for BokehJS library: \", url);\n      document.getElementsByTagName(\"head\")[0].appendChild(s);\n    }\n  };var element = document.getElementById(\"6e1f5720-b2a7-492f-9c6c-e1b6cb9b74bc\");\n  if (element == null) {\n    console.log(\"Bokeh: ERROR: autoload.js configured with elementid '6e1f5720-b2a7-492f-9c6c-e1b6cb9b74bc' but no matching script tag was found. \")\n    return false;\n  }\n\n  var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-0.12.16.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.16.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-0.12.16.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-0.12.16.min.js\"];\n\n  var inline_js = [\n    function(Bokeh) {\n      Bokeh.set_log_level(\"info\");\n    },\n    \n    function(Bokeh) {\n      \n    },\n    function(Bokeh) {\n      console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-0.12.16.min.css\");\n      Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-0.12.16.min.css\");\n      console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.16.min.css\");\n      Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.16.min.css\");\n      console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-tables-0.12.16.min.css\");\n      Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-tables-0.12.16.min.css\");\n    }\n  ];\n\n  function run_inline_js() {\n    \n    if ((root.Bokeh !== undefined) || (force === true)) {\n      for (var i = 0; i < inline_js.length; i++) {\n        inline_js[i].call(root, root.Bokeh);\n      }if (force === true) {\n        display_loaded();\n      }} else if (Date.now() < root._bokeh_timeout) {\n      setTimeout(run_inline_js, 100);\n    } else if (!root._bokeh_failed_load) {\n      console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n      root._bokeh_failed_load = true;\n    } else if (force !== true) {\n      var cell = $(document.getElementById(\"6e1f5720-b2a7-492f-9c6c-e1b6cb9b74bc\")).parents('.cell').data().cell;\n      cell.output_area.append_execute_result(NB_LOAD_WARNING)\n    }\n\n  }\n\n  if (root._bokeh_is_loading === 0) {\n    console.log(\"Bokeh: BokehJS loaded, going straight to plotting\");\n    run_inline_js();\n  } else {\n    load_libs(js_urls, function() {\n      console.log(\"Bokeh: BokehJS plotting callback run at\", now());\n      run_inline_js();\n    });\n  }\n}(window));"
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from pprint import pprint\n",
    "from bokeh.io import output_notebook, show\n",
    "from bokeh.layouts import gridplot\n",
    "from bokeh.models import Span\n",
    "output_notebook()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Plot some mrtrics such as call rate (fraction of non-missing calls), Transition/Transversions and Het/Hom ratio."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Call rate mean=0.989\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "<div class=\"bk-root\">\n",
       "    <div class=\"bk-plotdiv\" id=\"bca2b1bd-a133-49a7-99ed-3354812e87a0\"></div>\n",
       "</div>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/javascript": [
       "(function(root) {\n",
       "  function embed_document(root) {\n",
       "    \n",
       "  var docs_json = {\"53372f0f-e2c2-4261-8412-ba1dc3994006\":{\"roots\":{\"references\":[{\"attributes\":{\"label\":{\"value\":\"Call Rate\"},\"renderers\":[{\"id\":\"76a3cb88-cf21-44b5-a2cc-51bceff474c4\",\"type\":\"GlyphRenderer\"}]},\"id\":\"9c36cb73-0997-48a4-bd4e-7812be75249d\",\"type\":\"LegendItem\"},{\"attributes\":{},\"id\":\"8dfe8545-3651-4e90-bd0a-0af0b94faf16\",\"type\":\"LinearScale\"},{\"attributes\":{},\"id\":\"c7afde27-b4b3-4576-80c8-ec7162da64cf\",\"type\":\"BasicTickFormatter\"},{\"attributes\":{\"bottom_units\":\"screen\",\"fill_alpha\":{\"value\":0.5},\"fill_color\":{\"value\":\"lightgrey\"},\"left_units\":\"screen\",\"level\":\"overlay\",\"line_alpha\":{\"value\":1.0},\"line_color\":{\"value\":\"black\"},\"line_dash\":[4,4],\"line_width\":{\"value\":2},\"plot\":null,\"render_mode\":\"css\",\"right_units\":\"screen\",\"top_units\":\"screen\"},\"id\":\"db814e02-86ee-4dc9-a552-4095bc1242a5\",\"type\":\"BoxAnnotation\"},{\"attributes\":{},\"id\":\"46e271ed-c634-4824-9603-168e7fbcfdb7\",\"type\":\"BasicTickFormatter\"},{\"attributes\":{\"source\":{\"id\":\"2f7f2606-88ac-4fc6-908f-1cd9abcbd339\",\"type\":\"ColumnDataSource\"}},\"id\":\"2ee02bcd-a4af-43a8-875f-ee0761489a0e\",\"type\":\"CDSView\"},{\"attributes\":{},\"id\":\"deb5cdc4-e711-4938-8549-ba33256c80ea\",\"type\":\"UnionRenderers\"},{\"attributes\":{\"callback\":null,\"data\":{\"left\":[0.0,0.03333333333333333,0.06666666666666667,0.1,0.13333333333333333,0.16666666666666666,0.2,0.23333333333333334,0.26666666666666666,0.3,0.3333333333333333,0.36666666666666664,0.4,0.43333333333333335,0.4666666666666667,0.5,0.5333333333333333,0.5666666666666667,0.6,0.6333333333333333,0.6666666666666666,0.7,0.7333333333333333,0.7666666666666666,0.8,0.8333333333333334,0.8666666666666667,0.9,0.9333333333333333,0.9666666666666667],\"right\":[0.03333333333333333,0.06666666666666667,0.1,0.13333333333333333,0.16666666666666666,0.2,0.23333333333333334,0.26666666666666666,0.3,0.3333333333333333,0.36666666666666664,0.4,0.43333333333333335,0.4666666666666667,0.5,0.5333333333333333,0.5666666666666667,0.6,0.6333333333333333,0.6666666666666666,0.7,0.7333333333333333,0.7666666666666666,0.8,0.8333333333333334,0.8666666666666667,0.9,0.9333333333333333,0.9666666666666667,1.0],\"top\":[329,256,206,236,209,224,236,227,342,574,560,544,685,756,683,1005,785,976,1265,1305,1525,1768,1917,2484,2942,3182,4612,6139,10970,937311]},\"selected\":{\"id\":\"400c4c64-8de6-4c28-9bed-73f9daaa3654\",\"type\":\"Selection\"},\"selection_policy\":{\"id\":\"deb5cdc4-e711-4938-8549-ba33256c80ea\",\"type\":\"UnionRenderers\"}},\"id\":\"2f7f2606-88ac-4fc6-908f-1cd9abcbd339\",\"type\":\"ColumnDataSource\"},{\"attributes\":{},\"id\":\"400c4c64-8de6-4c28-9bed-73f9daaa3654\",\"type\":\"Selection\"},{\"attributes\":{},\"id\":\"942cfb41-c251-48b7-84f4-e7053789b88a\",\"type\":\"HelpTool\"},{\"attributes\":{},\"id\":\"9c309260-cc4d-4df0-9c72-48fe5f0c2d07\",\"type\":\"ResetTool\"},{\"attributes\":{\"callback\":null},\"id\":\"4a24b7b9-b5e4-410f-86f3-ff1e1d92b535\",\"type\":\"DataRange1d\"},{\"attributes\":{\"bottom\":{\"value\":0},\"fill_alpha\":{\"value\":0.1},\"fill_color\":{\"value\":\"#1f77b4\"},\"left\":{\"field\":\"left\"},\"line_alpha\":{\"value\":0.1},\"line_color\":{\"value\":\"#1f77b4\"},\"right\":{\"field\":\"right\"},\"top\":{\"field\":\"top\"}},\"id\":\"81ed34a0-b245-4433-abba-ac2c86276118\",\"type\":\"Quad\"},{\"attributes\":{\"axis_label\":\"Frequency\",\"formatter\":{\"id\":\"46e271ed-c634-4824-9603-168e7fbcfdb7\",\"type\":\"BasicTickFormatter\"},\"plot\":{\"id\":\"3ddf6291-10a7-49b5-9cdb-f83ab2274943\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"ticker\":{\"id\":\"03d4ba54-1d9f-4a9e-be85-012b559eba78\",\"type\":\"BasicTicker\"}},\"id\":\"06d9f368-63fc-4a75-9c33-da4379b7206f\",\"type\":\"LinearAxis\"},{\"attributes\":{\"overlay\":{\"id\":\"db814e02-86ee-4dc9-a552-4095bc1242a5\",\"type\":\"BoxAnnotation\"}},\"id\":\"3b593625-bd41-4ffc-9dc7-687018427dec\",\"type\":\"BoxZoomTool\"},{\"attributes\":{\"items\":[{\"id\":\"9c36cb73-0997-48a4-bd4e-7812be75249d\",\"type\":\"LegendItem\"}],\"plot\":{\"id\":\"3ddf6291-10a7-49b5-9cdb-f83ab2274943\",\"subtype\":\"Figure\",\"type\":\"Plot\"}},\"id\":\"3a43c2fa-da7d-4f86-b9fa-90e433e11fb9\",\"type\":\"Legend\"},{\"attributes\":{},\"id\":\"ad87743a-4912-4db3-967d-39b3c31c1589\",\"type\":\"WheelZoomTool\"},{\"attributes\":{\"background_fill_color\":{\"value\":\"#EEEEEE\"},\"below\":[{\"id\":\"af274baf-8d88-44bb-a25f-f50aacea8eeb\",\"type\":\"LinearAxis\"}],\"left\":[{\"id\":\"06d9f368-63fc-4a75-9c33-da4379b7206f\",\"type\":\"LinearAxis\"}],\"renderers\":[{\"id\":\"af274baf-8d88-44bb-a25f-f50aacea8eeb\",\"type\":\"LinearAxis\"},{\"id\":\"66aa862f-caa8-4338-9b43-c2df2656c250\",\"type\":\"Grid\"},{\"id\":\"06d9f368-63fc-4a75-9c33-da4379b7206f\",\"type\":\"LinearAxis\"},{\"id\":\"fc37cc78-5ebf-4229-a0bf-c8d8ad61bf9d\",\"type\":\"Grid\"},{\"id\":\"db814e02-86ee-4dc9-a552-4095bc1242a5\",\"type\":\"BoxAnnotation\"},{\"id\":\"3a43c2fa-da7d-4f86-b9fa-90e433e11fb9\",\"type\":\"Legend\"},{\"id\":\"76a3cb88-cf21-44b5-a2cc-51bceff474c4\",\"type\":\"GlyphRenderer\"}],\"title\":{\"id\":\"c2ec505a-31d6-4221-a002-86e0ab9976d3\",\"type\":\"Title\"},\"toolbar\":{\"id\":\"2b659c51-3f81-483e-b27f-d398fd654ef8\",\"type\":\"Toolbar\"},\"x_range\":{\"id\":\"e68289e5-3fde-4852-af55-c0c7b3c3e3d2\",\"type\":\"DataRange1d\"},\"x_scale\":{\"id\":\"8dfe8545-3651-4e90-bd0a-0af0b94faf16\",\"type\":\"LinearScale\"},\"y_range\":{\"id\":\"4a24b7b9-b5e4-410f-86f3-ff1e1d92b535\",\"type\":\"DataRange1d\"},\"y_scale\":{\"id\":\"95fccfe1-2a85-4433-b416-dbcbccb07dd5\",\"type\":\"LinearScale\"}},\"id\":\"3ddf6291-10a7-49b5-9cdb-f83ab2274943\",\"subtype\":\"Figure\",\"type\":\"Plot\"},{\"attributes\":{\"axis_label\":\"Call Rate\",\"formatter\":{\"id\":\"c7afde27-b4b3-4576-80c8-ec7162da64cf\",\"type\":\"BasicTickFormatter\"},\"plot\":{\"id\":\"3ddf6291-10a7-49b5-9cdb-f83ab2274943\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"ticker\":{\"id\":\"06ea3d48-6dc0-48bd-b1e5-3eaa744c50d9\",\"type\":\"BasicTicker\"}},\"id\":\"af274baf-8d88-44bb-a25f-f50aacea8eeb\",\"type\":\"LinearAxis\"},{\"attributes\":{\"callback\":null},\"id\":\"e68289e5-3fde-4852-af55-c0c7b3c3e3d2\",\"type\":\"DataRange1d\"},{\"attributes\":{},\"id\":\"7f39211b-1c2a-46aa-bc57-ece645d990ee\",\"type\":\"PanTool\"},{\"attributes\":{\"data_source\":{\"id\":\"2f7f2606-88ac-4fc6-908f-1cd9abcbd339\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"60fc95a1-3432-4f75-a967-3182b085ce22\",\"type\":\"Quad\"},\"hover_glyph\":null,\"muted_glyph\":null,\"nonselection_glyph\":{\"id\":\"81ed34a0-b245-4433-abba-ac2c86276118\",\"type\":\"Quad\"},\"selection_glyph\":null,\"view\":{\"id\":\"2ee02bcd-a4af-43a8-875f-ee0761489a0e\",\"type\":\"CDSView\"}},\"id\":\"76a3cb88-cf21-44b5-a2cc-51bceff474c4\",\"type\":\"GlyphRenderer\"},{\"attributes\":{},\"id\":\"06ea3d48-6dc0-48bd-b1e5-3eaa744c50d9\",\"type\":\"BasicTicker\"},{\"attributes\":{},\"id\":\"03d4ba54-1d9f-4a9e-be85-012b559eba78\",\"type\":\"BasicTicker\"},{\"attributes\":{\"dimension\":1,\"plot\":{\"id\":\"3ddf6291-10a7-49b5-9cdb-f83ab2274943\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"ticker\":{\"id\":\"03d4ba54-1d9f-4a9e-be85-012b559eba78\",\"type\":\"BasicTicker\"}},\"id\":\"fc37cc78-5ebf-4229-a0bf-c8d8ad61bf9d\",\"type\":\"Grid\"},{\"attributes\":{\"plot\":null,\"text\":\"Call rate Histogram\"},\"id\":\"c2ec505a-31d6-4221-a002-86e0ab9976d3\",\"type\":\"Title\"},{\"attributes\":{},\"id\":\"d69d7e34-6d03-4528-b1f0-02fdce29e0ed\",\"type\":\"SaveTool\"},{\"attributes\":{\"bottom\":{\"value\":0},\"fill_color\":{\"value\":\"#1f77b4\"},\"left\":{\"field\":\"left\"},\"right\":{\"field\":\"right\"},\"top\":{\"field\":\"top\"}},\"id\":\"60fc95a1-3432-4f75-a967-3182b085ce22\",\"type\":\"Quad\"},{\"attributes\":{},\"id\":\"95fccfe1-2a85-4433-b416-dbcbccb07dd5\",\"type\":\"LinearScale\"},{\"attributes\":{\"active_drag\":\"auto\",\"active_inspect\":\"auto\",\"active_scroll\":\"auto\",\"active_tap\":\"auto\",\"tools\":[{\"id\":\"7f39211b-1c2a-46aa-bc57-ece645d990ee\",\"type\":\"PanTool\"},{\"id\":\"ad87743a-4912-4db3-967d-39b3c31c1589\",\"type\":\"WheelZoomTool\"},{\"id\":\"3b593625-bd41-4ffc-9dc7-687018427dec\",\"type\":\"BoxZoomTool\"},{\"id\":\"d69d7e34-6d03-4528-b1f0-02fdce29e0ed\",\"type\":\"SaveTool\"},{\"id\":\"9c309260-cc4d-4df0-9c72-48fe5f0c2d07\",\"type\":\"ResetTool\"},{\"id\":\"942cfb41-c251-48b7-84f4-e7053789b88a\",\"type\":\"HelpTool\"}]},\"id\":\"2b659c51-3f81-483e-b27f-d398fd654ef8\",\"type\":\"Toolbar\"},{\"attributes\":{\"plot\":{\"id\":\"3ddf6291-10a7-49b5-9cdb-f83ab2274943\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"ticker\":{\"id\":\"06ea3d48-6dc0-48bd-b1e5-3eaa744c50d9\",\"type\":\"BasicTicker\"}},\"id\":\"66aa862f-caa8-4338-9b43-c2df2656c250\",\"type\":\"Grid\"}],\"root_ids\":[\"3ddf6291-10a7-49b5-9cdb-f83ab2274943\"]},\"title\":\"Bokeh Application\",\"version\":\"0.12.16\"}};\n",
       "  var render_items = [{\"docid\":\"53372f0f-e2c2-4261-8412-ba1dc3994006\",\"elementid\":\"bca2b1bd-a133-49a7-99ed-3354812e87a0\",\"modelid\":\"3ddf6291-10a7-49b5-9cdb-f83ab2274943\"}];\n",
       "  root.Bokeh.embed.embed_items_notebook(docs_json, render_items);\n",
       "\n",
       "  }\n",
       "  if (root.Bokeh !== undefined) {\n",
       "    embed_document(root);\n",
       "  } else {\n",
       "    var attempts = 0;\n",
       "    var timer = setInterval(function(root) {\n",
       "      if (root.Bokeh !== undefined) {\n",
       "        embed_document(root);\n",
       "        clearInterval(timer);\n",
       "      }\n",
       "      attempts++;\n",
       "      if (attempts > 100) {\n",
       "        console.log(\"Bokeh: ERROR: Unable to run BokehJS code because BokehJS library is missing\")\n",
       "        clearInterval(timer);\n",
       "      }\n",
       "    }, 10, root)\n",
       "  }\n",
       "})(window);"
      ],
      "application/vnd.bokehjs_exec.v0+json": ""
     },
     "metadata": {
      "application/vnd.bokehjs_exec.v0+json": {
       "id": "3ddf6291-10a7-49b5-9cdb-f83ab2274943"
      }
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "call_rate_mean = vt.aggregate_rows(agg.mean(vt.variant_qc.call_rate))\n",
    "print('Call rate mean=%.3f' % call_rate_mean)\n",
    "p = hl.plot.histogram(vt.variant_qc.call_rate, range=(0,1.0), bins=30, title='Call rate Histogram', legend='Call Rate')\n",
    "show(p)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Ti/Tv (sometimes called Ts/Tv): the ratio of transitions vs. transversions in SNPs. We expect this ratio to be close to 2.1 for whole-genome sequencing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Ti/Tv mean=2.054\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "<div class=\"bk-root\">\n",
       "    <div class=\"bk-plotdiv\" id=\"74d8bb7d-2f07-4f51-88a5-3fedcd9d5aee\"></div>\n",
       "</div>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/javascript": [
       "(function(root) {\n",
       "  function embed_document(root) {\n",
       "    \n",
       "  var docs_json = {\"d414b0e6-28f6-4936-aae8-a42fef7516ca\":{\"roots\":{\"references\":[{\"attributes\":{},\"id\":\"a5c3b736-a8c4-48ec-be2a-82a906c159cf\",\"type\":\"LinearScale\"},{\"attributes\":{\"label\":{\"value\":\"Ti/Tv\"},\"renderers\":[{\"id\":\"9e817dd4-e155-4780-b42b-c312e25d2d26\",\"type\":\"GlyphRenderer\"}]},\"id\":\"7ce9f7e2-1b77-4ab4-93ee-2209aef3ef13\",\"type\":\"LegendItem\"},{\"attributes\":{},\"id\":\"eb65afb6-1770-430a-9283-d05c3cf5a18f\",\"type\":\"SaveTool\"},{\"attributes\":{\"plot\":null,\"text\":\"Ti/Tv Histogram\"},\"id\":\"2238b99f-0f6b-41d2-8e3e-d7827c516cbe\",\"type\":\"Title\"},{\"attributes\":{},\"id\":\"4cba8f2b-28fe-47b7-84b8-a098aef4012b\",\"type\":\"Selection\"},{\"attributes\":{\"dimension\":1,\"plot\":{\"id\":\"b13004da-8c8b-4ed7-af8c-6a6e2aedbc1b\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"ticker\":{\"id\":\"fd1460d4-ab3d-478d-968d-3651e0176208\",\"type\":\"BasicTicker\"}},\"id\":\"b5f485f5-aec8-482a-9b6a-08d5aa72436d\",\"type\":\"Grid\"},{\"attributes\":{},\"id\":\"74c434bf-daf0-4e8b-b195-87fef4a17504\",\"type\":\"BasicTicker\"},{\"attributes\":{\"bottom\":{\"value\":0},\"fill_alpha\":{\"value\":0.1},\"fill_color\":{\"value\":\"#1f77b4\"},\"left\":{\"field\":\"left\"},\"line_alpha\":{\"value\":0.1},\"line_color\":{\"value\":\"#1f77b4\"},\"right\":{\"field\":\"right\"},\"top\":{\"field\":\"top\"}},\"id\":\"05371486-bdd7-4ca6-aa83-1e8092126ab0\",\"type\":\"Quad\"},{\"attributes\":{},\"id\":\"fd1460d4-ab3d-478d-968d-3651e0176208\",\"type\":\"BasicTicker\"},{\"attributes\":{\"background_fill_color\":{\"value\":\"#EEEEEE\"},\"below\":[{\"id\":\"18f75ed8-b10a-4f84-9f7e-60e8cd57012d\",\"type\":\"LinearAxis\"}],\"left\":[{\"id\":\"4c7eab41-d18c-49cb-9fcc-064ba4ef8408\",\"type\":\"LinearAxis\"}],\"renderers\":[{\"id\":\"18f75ed8-b10a-4f84-9f7e-60e8cd57012d\",\"type\":\"LinearAxis\"},{\"id\":\"94135cde-2dc1-4468-a8b0-07ff4fc430d3\",\"type\":\"Grid\"},{\"id\":\"4c7eab41-d18c-49cb-9fcc-064ba4ef8408\",\"type\":\"LinearAxis\"},{\"id\":\"b5f485f5-aec8-482a-9b6a-08d5aa72436d\",\"type\":\"Grid\"},{\"id\":\"46bf7f35-7632-4979-a411-48fb606d32d6\",\"type\":\"BoxAnnotation\"},{\"id\":\"58923fb1-8192-4132-bd21-10b0a024cc2b\",\"type\":\"Legend\"},{\"id\":\"9e817dd4-e155-4780-b42b-c312e25d2d26\",\"type\":\"GlyphRenderer\"}],\"title\":{\"id\":\"2238b99f-0f6b-41d2-8e3e-d7827c516cbe\",\"type\":\"Title\"},\"toolbar\":{\"id\":\"0cecd3c5-efac-4d80-96c8-18eb26bd9af2\",\"type\":\"Toolbar\"},\"x_range\":{\"id\":\"f22239ec-38de-4d53-a004-fae06efdece0\",\"type\":\"DataRange1d\"},\"x_scale\":{\"id\":\"a5c3b736-a8c4-48ec-be2a-82a906c159cf\",\"type\":\"LinearScale\"},\"y_range\":{\"id\":\"ff003b5b-7456-4af3-8afe-ef6362961cd2\",\"type\":\"DataRange1d\"},\"y_scale\":{\"id\":\"9e585d07-591f-4293-8cd0-8bf51d01fdcb\",\"type\":\"LinearScale\"}},\"id\":\"b13004da-8c8b-4ed7-af8c-6a6e2aedbc1b\",\"subtype\":\"Figure\",\"type\":\"Plot\"},{\"attributes\":{\"source\":{\"id\":\"efb68a86-0dd7-4ac4-99c3-dcd0eca28f4b\",\"type\":\"ColumnDataSource\"}},\"id\":\"4d49686e-df80-4876-b2e3-c344ba1f53f8\",\"type\":\"CDSView\"},{\"attributes\":{},\"id\":\"b7c09212-7ac5-4626-9c4c-c4cb08164a2e\",\"type\":\"PanTool\"},{\"attributes\":{},\"id\":\"9e585d07-591f-4293-8cd0-8bf51d01fdcb\",\"type\":\"LinearScale\"},{\"attributes\":{},\"id\":\"b57f127f-59ef-48c7-a14c-a769493ba49d\",\"type\":\"BasicTickFormatter\"},{\"attributes\":{},\"id\":\"e334a2a0-1241-453b-84bc-d8edd121f057\",\"type\":\"BasicTickFormatter\"},{\"attributes\":{},\"id\":\"851a6c80-6022-4a49-963f-9d963891d28c\",\"type\":\"WheelZoomTool\"},{\"attributes\":{\"axis_label\":\"Frequency\",\"formatter\":{\"id\":\"b57f127f-59ef-48c7-a14c-a769493ba49d\",\"type\":\"BasicTickFormatter\"},\"plot\":{\"id\":\"b13004da-8c8b-4ed7-af8c-6a6e2aedbc1b\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"ticker\":{\"id\":\"fd1460d4-ab3d-478d-968d-3651e0176208\",\"type\":\"BasicTicker\"}},\"id\":\"4c7eab41-d18c-49cb-9fcc-064ba4ef8408\",\"type\":\"LinearAxis\"},{\"attributes\":{\"callback\":null},\"id\":\"ff003b5b-7456-4af3-8afe-ef6362961cd2\",\"type\":\"DataRange1d\"},{\"attributes\":{\"active_drag\":\"auto\",\"active_inspect\":\"auto\",\"active_scroll\":\"auto\",\"active_tap\":\"auto\",\"tools\":[{\"id\":\"b7c09212-7ac5-4626-9c4c-c4cb08164a2e\",\"type\":\"PanTool\"},{\"id\":\"851a6c80-6022-4a49-963f-9d963891d28c\",\"type\":\"WheelZoomTool\"},{\"id\":\"215a9600-fd21-4b5e-a68b-daa0f812ac23\",\"type\":\"BoxZoomTool\"},{\"id\":\"eb65afb6-1770-430a-9283-d05c3cf5a18f\",\"type\":\"SaveTool\"},{\"id\":\"1e037eb8-393d-41f4-89da-7016a3fc227c\",\"type\":\"ResetTool\"},{\"id\":\"ef1b84a9-dbdc-4efd-8ab0-b832eeb851f0\",\"type\":\"HelpTool\"}]},\"id\":\"0cecd3c5-efac-4d80-96c8-18eb26bd9af2\",\"type\":\"Toolbar\"},{\"attributes\":{\"callback\":null,\"data\":{\"left\":[1.0,1.0666666666666667,1.1333333333333333,1.2,1.2666666666666666,1.3333333333333333,1.4,1.4666666666666668,1.5333333333333332,1.6,1.6666666666666665,1.7333333333333334,1.8,1.8666666666666667,1.9333333333333333,2.0,2.0666666666666664,2.1333333333333333,2.2,2.2666666666666666,2.333333333333333,2.4,2.466666666666667,2.533333333333333,2.6,2.666666666666667,2.7333333333333334,2.8,2.8666666666666667,2.9333333333333336],\"right\":[1.0666666666666667,1.1333333333333333,1.2,1.2666666666666666,1.3333333333333333,1.4,1.4666666666666668,1.5333333333333332,1.6,1.6666666666666665,1.7333333333333334,1.8,1.8666666666666667,1.9333333333333333,2.0,2.0666666666666664,2.1333333333333333,2.2,2.2666666666666666,2.333333333333333,2.4,2.466666666666667,2.533333333333333,2.6,2.666666666666667,2.7333333333333334,2.8,2.8666666666666667,2.9333333333333336,3.0],\"top\":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,110236336,35433108,0,0,0,0,0,0,0,0,0,0,0,0,0]},\"selected\":{\"id\":\"4cba8f2b-28fe-47b7-84b8-a098aef4012b\",\"type\":\"Selection\"},\"selection_policy\":{\"id\":\"def46c49-fe93-4a68-bfe4-ff6da1946d46\",\"type\":\"UnionRenderers\"}},\"id\":\"efb68a86-0dd7-4ac4-99c3-dcd0eca28f4b\",\"type\":\"ColumnDataSource\"},{\"attributes\":{\"axis_label\":\"Ti/Tv\",\"formatter\":{\"id\":\"e334a2a0-1241-453b-84bc-d8edd121f057\",\"type\":\"BasicTickFormatter\"},\"plot\":{\"id\":\"b13004da-8c8b-4ed7-af8c-6a6e2aedbc1b\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"ticker\":{\"id\":\"74c434bf-daf0-4e8b-b195-87fef4a17504\",\"type\":\"BasicTicker\"}},\"id\":\"18f75ed8-b10a-4f84-9f7e-60e8cd57012d\",\"type\":\"LinearAxis\"},{\"attributes\":{},\"id\":\"1e037eb8-393d-41f4-89da-7016a3fc227c\",\"type\":\"ResetTool\"},{\"attributes\":{\"bottom_units\":\"screen\",\"fill_alpha\":{\"value\":0.5},\"fill_color\":{\"value\":\"lightgrey\"},\"left_units\":\"screen\",\"level\":\"overlay\",\"line_alpha\":{\"value\":1.0},\"line_color\":{\"value\":\"black\"},\"line_dash\":[4,4],\"line_width\":{\"value\":2},\"plot\":null,\"render_mode\":\"css\",\"right_units\":\"screen\",\"top_units\":\"screen\"},\"id\":\"46bf7f35-7632-4979-a411-48fb606d32d6\",\"type\":\"BoxAnnotation\"},{\"attributes\":{},\"id\":\"def46c49-fe93-4a68-bfe4-ff6da1946d46\",\"type\":\"UnionRenderers\"},{\"attributes\":{\"plot\":{\"id\":\"b13004da-8c8b-4ed7-af8c-6a6e2aedbc1b\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"ticker\":{\"id\":\"74c434bf-daf0-4e8b-b195-87fef4a17504\",\"type\":\"BasicTicker\"}},\"id\":\"94135cde-2dc1-4468-a8b0-07ff4fc430d3\",\"type\":\"Grid\"},{\"attributes\":{\"items\":[{\"id\":\"7ce9f7e2-1b77-4ab4-93ee-2209aef3ef13\",\"type\":\"LegendItem\"}],\"plot\":{\"id\":\"b13004da-8c8b-4ed7-af8c-6a6e2aedbc1b\",\"subtype\":\"Figure\",\"type\":\"Plot\"}},\"id\":\"58923fb1-8192-4132-bd21-10b0a024cc2b\",\"type\":\"Legend\"},{\"attributes\":{},\"id\":\"ef1b84a9-dbdc-4efd-8ab0-b832eeb851f0\",\"type\":\"HelpTool\"},{\"attributes\":{\"bottom\":{\"value\":0},\"fill_color\":{\"value\":\"#1f77b4\"},\"left\":{\"field\":\"left\"},\"right\":{\"field\":\"right\"},\"top\":{\"field\":\"top\"}},\"id\":\"98db7989-83d9-4bea-a9c2-a0bc95bd3b5e\",\"type\":\"Quad\"},{\"attributes\":{\"callback\":null},\"id\":\"f22239ec-38de-4d53-a004-fae06efdece0\",\"type\":\"DataRange1d\"},{\"attributes\":{\"overlay\":{\"id\":\"46bf7f35-7632-4979-a411-48fb606d32d6\",\"type\":\"BoxAnnotation\"}},\"id\":\"215a9600-fd21-4b5e-a68b-daa0f812ac23\",\"type\":\"BoxZoomTool\"},{\"attributes\":{\"data_source\":{\"id\":\"efb68a86-0dd7-4ac4-99c3-dcd0eca28f4b\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"98db7989-83d9-4bea-a9c2-a0bc95bd3b5e\",\"type\":\"Quad\"},\"hover_glyph\":null,\"muted_glyph\":null,\"nonselection_glyph\":{\"id\":\"05371486-bdd7-4ca6-aa83-1e8092126ab0\",\"type\":\"Quad\"},\"selection_glyph\":null,\"view\":{\"id\":\"4d49686e-df80-4876-b2e3-c344ba1f53f8\",\"type\":\"CDSView\"}},\"id\":\"9e817dd4-e155-4780-b42b-c312e25d2d26\",\"type\":\"GlyphRenderer\"}],\"root_ids\":[\"b13004da-8c8b-4ed7-af8c-6a6e2aedbc1b\"]},\"title\":\"Bokeh Application\",\"version\":\"0.12.16\"}};\n",
       "  var render_items = [{\"docid\":\"d414b0e6-28f6-4936-aae8-a42fef7516ca\",\"elementid\":\"74d8bb7d-2f07-4f51-88a5-3fedcd9d5aee\",\"modelid\":\"b13004da-8c8b-4ed7-af8c-6a6e2aedbc1b\"}];\n",
       "  root.Bokeh.embed.embed_items_notebook(docs_json, render_items);\n",
       "\n",
       "  }\n",
       "  if (root.Bokeh !== undefined) {\n",
       "    embed_document(root);\n",
       "  } else {\n",
       "    var attempts = 0;\n",
       "    var timer = setInterval(function(root) {\n",
       "      if (root.Bokeh !== undefined) {\n",
       "        embed_document(root);\n",
       "        clearInterval(timer);\n",
       "      }\n",
       "      attempts++;\n",
       "      if (attempts > 100) {\n",
       "        console.log(\"Bokeh: ERROR: Unable to run BokehJS code because BokehJS library is missing\")\n",
       "        clearInterval(timer);\n",
       "      }\n",
       "    }, 10, root)\n",
       "  }\n",
       "})(window);"
      ],
      "application/vnd.bokehjs_exec.v0+json": ""
     },
     "metadata": {
      "application/vnd.bokehjs_exec.v0+json": {
       "id": "b13004da-8c8b-4ed7-af8c-6a6e2aedbc1b"
      }
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "r_ti_tv_mean = vt.aggregate_cols(agg.mean(vt.sample_qc.r_ti_tv))\n",
    "print(\"Ti/Tv mean=%.3f\" % r_ti_tv_mean)\n",
    "p = hl.plot.histogram(vt.sample_qc.r_ti_tv, range=(1,3), bins=30, title='Ti/Tv Histogram', legend='Ti/Tv')\n",
    "show(p)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Plot a histogram of the ratio of heterozygous vs homozygous variants across samples. We expect this ratio to be approximately 2:1."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Het/hom var mean=2.31\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "<div class=\"bk-root\">\n",
       "    <div class=\"bk-plotdiv\" id=\"c076ec8b-01a5-4fc4-9b60-4c1b3f995fd1\"></div>\n",
       "</div>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/javascript": [
       "(function(root) {\n",
       "  function embed_document(root) {\n",
       "    \n",
       "  var docs_json = {\"0fcb1c21-c0d2-4cd4-a974-adc9459a705b\":{\"roots\":{\"references\":[{\"attributes\":{},\"id\":\"92a9948d-41f4-4067-9f88-7ac60ff04dd1\",\"type\":\"SaveTool\"},{\"attributes\":{},\"id\":\"846c570b-2925-4748-88d7-461eb25c17ce\",\"type\":\"BasicTickFormatter\"},{\"attributes\":{},\"id\":\"e8dbde74-c028-49c4-b6e6-e8252ef95b06\",\"type\":\"ResetTool\"},{\"attributes\":{\"active_drag\":\"auto\",\"active_inspect\":\"auto\",\"active_scroll\":\"auto\",\"active_tap\":\"auto\",\"tools\":[{\"id\":\"fcbecad6-ca42-45ac-9cfc-564c57e9e75b\",\"type\":\"PanTool\"},{\"id\":\"4436aa40-b667-4e2f-9f0d-164fd43f8d6c\",\"type\":\"WheelZoomTool\"},{\"id\":\"5e7c94ff-b6b3-4590-95ba-3542d3cf8127\",\"type\":\"BoxZoomTool\"},{\"id\":\"92a9948d-41f4-4067-9f88-7ac60ff04dd1\",\"type\":\"SaveTool\"},{\"id\":\"e8dbde74-c028-49c4-b6e6-e8252ef95b06\",\"type\":\"ResetTool\"},{\"id\":\"bb299cc9-cff7-424e-80a9-94ab2698e0fa\",\"type\":\"HelpTool\"}]},\"id\":\"89852b87-678f-4de3-8039-be6dadecc0d2\",\"type\":\"Toolbar\"},{\"attributes\":{\"label\":{\"value\":\"Ti/Tv\"},\"renderers\":[{\"id\":\"85919d13-30db-4308-80dc-420e01c95ad4\",\"type\":\"GlyphRenderer\"}]},\"id\":\"c60d6089-6a34-4eab-8034-9ac11384ec99\",\"type\":\"LegendItem\"},{\"attributes\":{},\"id\":\"06b6a204-9633-406a-8e9a-103b0c7cbc04\",\"type\":\"LinearScale\"},{\"attributes\":{\"overlay\":{\"id\":\"23a907df-4a87-4c09-bae7-e2ab48d40ff9\",\"type\":\"BoxAnnotation\"}},\"id\":\"5e7c94ff-b6b3-4590-95ba-3542d3cf8127\",\"type\":\"BoxZoomTool\"},{\"attributes\":{\"axis_label\":\"Ti/Tv\",\"formatter\":{\"id\":\"846c570b-2925-4748-88d7-461eb25c17ce\",\"type\":\"BasicTickFormatter\"},\"plot\":{\"id\":\"e6425219-03b8-4943-93c8-b282cbf29038\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"ticker\":{\"id\":\"ea43c7c3-ff62-4dc8-9b62-bdb6f183ce11\",\"type\":\"BasicTicker\"}},\"id\":\"6cd966d3-ac42-4ec0-86d7-f6e98124a6f0\",\"type\":\"LinearAxis\"},{\"attributes\":{},\"id\":\"4436aa40-b667-4e2f-9f0d-164fd43f8d6c\",\"type\":\"WheelZoomTool\"},{\"attributes\":{},\"id\":\"69195649-13d0-4bfa-86e2-7da2c86d83e3\",\"type\":\"UnionRenderers\"},{\"attributes\":{},\"id\":\"4910f2ad-204b-47c8-9a49-aebb1af7675c\",\"type\":\"LinearScale\"},{\"attributes\":{},\"id\":\"fcbecad6-ca42-45ac-9cfc-564c57e9e75b\",\"type\":\"PanTool\"},{\"attributes\":{\"axis_label\":\"Frequency\",\"formatter\":{\"id\":\"04cfcb54-e999-42ad-a4f8-4905053e3cef\",\"type\":\"BasicTickFormatter\"},\"plot\":{\"id\":\"e6425219-03b8-4943-93c8-b282cbf29038\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"ticker\":{\"id\":\"2d8315ef-2826-4a18-9927-abe348ca64bd\",\"type\":\"BasicTicker\"}},\"id\":\"a50cb4d7-fb34-4815-91f4-69111b8fd4ff\",\"type\":\"LinearAxis\"},{\"attributes\":{},\"id\":\"262b185c-b17d-418c-9676-7d75ca750f65\",\"type\":\"Selection\"},{\"attributes\":{\"plot\":{\"id\":\"e6425219-03b8-4943-93c8-b282cbf29038\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"ticker\":{\"id\":\"ea43c7c3-ff62-4dc8-9b62-bdb6f183ce11\",\"type\":\"BasicTicker\"}},\"id\":\"e650b671-c0f5-4721-8045-054c1ad266ca\",\"type\":\"Grid\"},{\"attributes\":{},\"id\":\"ea43c7c3-ff62-4dc8-9b62-bdb6f183ce11\",\"type\":\"BasicTicker\"},{\"attributes\":{},\"id\":\"04cfcb54-e999-42ad-a4f8-4905053e3cef\",\"type\":\"BasicTickFormatter\"},{\"attributes\":{\"plot\":null,\"text\":\"Ti/Tv Histogram\"},\"id\":\"f069d61b-6382-4a31-b89b-a025129e7e30\",\"type\":\"Title\"},{\"attributes\":{\"bottom\":{\"value\":0},\"fill_color\":{\"value\":\"#1f77b4\"},\"left\":{\"field\":\"left\"},\"right\":{\"field\":\"right\"},\"top\":{\"field\":\"top\"}},\"id\":\"288339d2-5f69-44f2-b93a-9478d2a730f4\",\"type\":\"Quad\"},{\"attributes\":{\"callback\":null},\"id\":\"bff5f59c-3ebe-4db0-ad2c-76c8a755dfce\",\"type\":\"DataRange1d\"},{\"attributes\":{\"dimension\":1,\"plot\":{\"id\":\"e6425219-03b8-4943-93c8-b282cbf29038\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"ticker\":{\"id\":\"2d8315ef-2826-4a18-9927-abe348ca64bd\",\"type\":\"BasicTicker\"}},\"id\":\"c7b513ce-38f0-44ef-a63e-390a975d00da\",\"type\":\"Grid\"},{\"attributes\":{\"callback\":null,\"data\":{\"left\":[0.0,0.16666666666666666,0.3333333333333333,0.5,0.6666666666666666,0.8333333333333333,1.0,1.1666666666666665,1.3333333333333333,1.5,1.6666666666666665,1.8333333333333333,2.0,2.1666666666666665,2.333333333333333,2.5,2.6666666666666665,2.833333333333333,3.0,3.1666666666666665,3.333333333333333,3.5,3.6666666666666665,3.833333333333333,4.0,4.166666666666666,4.333333333333333,4.5,4.666666666666666,4.833333333333333],\"right\":[0.16666666666666666,0.3333333333333333,0.5,0.6666666666666666,0.8333333333333333,1.0,1.1666666666666665,1.3333333333333333,1.5,1.6666666666666665,1.8333333333333333,2.0,2.1666666666666665,2.333333333333333,2.5,2.6666666666666665,2.833333333333333,3.0,3.1666666666666665,3.333333333333333,3.5,3.6666666666666665,3.833333333333333,4.0,4.166666666666666,4.333333333333333,4.5,4.666666666666666,4.833333333333333,5.0],\"top\":[0,0,0,0,0,0,0,0,0,0,5905518,29527590,19685060,9842530,30511843,37401614,12795289,0,0,0,0,0,0,0,0,0,0,0,0,0]},\"selected\":{\"id\":\"262b185c-b17d-418c-9676-7d75ca750f65\",\"type\":\"Selection\"},\"selection_policy\":{\"id\":\"69195649-13d0-4bfa-86e2-7da2c86d83e3\",\"type\":\"UnionRenderers\"}},\"id\":\"086a8b1b-1005-4e23-9bf6-ad2ab9fab687\",\"type\":\"ColumnDataSource\"},{\"attributes\":{\"items\":[{\"id\":\"c60d6089-6a34-4eab-8034-9ac11384ec99\",\"type\":\"LegendItem\"}],\"plot\":{\"id\":\"e6425219-03b8-4943-93c8-b282cbf29038\",\"subtype\":\"Figure\",\"type\":\"Plot\"}},\"id\":\"70db2b9d-e39c-4f43-95a3-4c9a55ed9f24\",\"type\":\"Legend\"},{\"attributes\":{\"callback\":null},\"id\":\"7279bd7e-f104-4060-bfc2-fff4cb4ddf4a\",\"type\":\"DataRange1d\"},{\"attributes\":{\"background_fill_color\":{\"value\":\"#EEEEEE\"},\"below\":[{\"id\":\"6cd966d3-ac42-4ec0-86d7-f6e98124a6f0\",\"type\":\"LinearAxis\"}],\"left\":[{\"id\":\"a50cb4d7-fb34-4815-91f4-69111b8fd4ff\",\"type\":\"LinearAxis\"}],\"renderers\":[{\"id\":\"6cd966d3-ac42-4ec0-86d7-f6e98124a6f0\",\"type\":\"LinearAxis\"},{\"id\":\"e650b671-c0f5-4721-8045-054c1ad266ca\",\"type\":\"Grid\"},{\"id\":\"a50cb4d7-fb34-4815-91f4-69111b8fd4ff\",\"type\":\"LinearAxis\"},{\"id\":\"c7b513ce-38f0-44ef-a63e-390a975d00da\",\"type\":\"Grid\"},{\"id\":\"23a907df-4a87-4c09-bae7-e2ab48d40ff9\",\"type\":\"BoxAnnotation\"},{\"id\":\"70db2b9d-e39c-4f43-95a3-4c9a55ed9f24\",\"type\":\"Legend\"},{\"id\":\"85919d13-30db-4308-80dc-420e01c95ad4\",\"type\":\"GlyphRenderer\"}],\"title\":{\"id\":\"f069d61b-6382-4a31-b89b-a025129e7e30\",\"type\":\"Title\"},\"toolbar\":{\"id\":\"89852b87-678f-4de3-8039-be6dadecc0d2\",\"type\":\"Toolbar\"},\"x_range\":{\"id\":\"bff5f59c-3ebe-4db0-ad2c-76c8a755dfce\",\"type\":\"DataRange1d\"},\"x_scale\":{\"id\":\"06b6a204-9633-406a-8e9a-103b0c7cbc04\",\"type\":\"LinearScale\"},\"y_range\":{\"id\":\"7279bd7e-f104-4060-bfc2-fff4cb4ddf4a\",\"type\":\"DataRange1d\"},\"y_scale\":{\"id\":\"4910f2ad-204b-47c8-9a49-aebb1af7675c\",\"type\":\"LinearScale\"}},\"id\":\"e6425219-03b8-4943-93c8-b282cbf29038\",\"subtype\":\"Figure\",\"type\":\"Plot\"},{\"attributes\":{\"data_source\":{\"id\":\"086a8b1b-1005-4e23-9bf6-ad2ab9fab687\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"288339d2-5f69-44f2-b93a-9478d2a730f4\",\"type\":\"Quad\"},\"hover_glyph\":null,\"muted_glyph\":null,\"nonselection_glyph\":{\"id\":\"07700fcf-815c-4620-909c-a14c7489c167\",\"type\":\"Quad\"},\"selection_glyph\":null,\"view\":{\"id\":\"77f2900c-e157-4b42-a4cc-9102b3d09692\",\"type\":\"CDSView\"}},\"id\":\"85919d13-30db-4308-80dc-420e01c95ad4\",\"type\":\"GlyphRenderer\"},{\"attributes\":{\"bottom_units\":\"screen\",\"fill_alpha\":{\"value\":0.5},\"fill_color\":{\"value\":\"lightgrey\"},\"left_units\":\"screen\",\"level\":\"overlay\",\"line_alpha\":{\"value\":1.0},\"line_color\":{\"value\":\"black\"},\"line_dash\":[4,4],\"line_width\":{\"value\":2},\"plot\":null,\"render_mode\":\"css\",\"right_units\":\"screen\",\"top_units\":\"screen\"},\"id\":\"23a907df-4a87-4c09-bae7-e2ab48d40ff9\",\"type\":\"BoxAnnotation\"},{\"attributes\":{\"bottom\":{\"value\":0},\"fill_alpha\":{\"value\":0.1},\"fill_color\":{\"value\":\"#1f77b4\"},\"left\":{\"field\":\"left\"},\"line_alpha\":{\"value\":0.1},\"line_color\":{\"value\":\"#1f77b4\"},\"right\":{\"field\":\"right\"},\"top\":{\"field\":\"top\"}},\"id\":\"07700fcf-815c-4620-909c-a14c7489c167\",\"type\":\"Quad\"},{\"attributes\":{\"source\":{\"id\":\"086a8b1b-1005-4e23-9bf6-ad2ab9fab687\",\"type\":\"ColumnDataSource\"}},\"id\":\"77f2900c-e157-4b42-a4cc-9102b3d09692\",\"type\":\"CDSView\"},{\"attributes\":{},\"id\":\"bb299cc9-cff7-424e-80a9-94ab2698e0fa\",\"type\":\"HelpTool\"},{\"attributes\":{},\"id\":\"2d8315ef-2826-4a18-9927-abe348ca64bd\",\"type\":\"BasicTicker\"}],\"root_ids\":[\"e6425219-03b8-4943-93c8-b282cbf29038\"]},\"title\":\"Bokeh Application\",\"version\":\"0.12.16\"}};\n",
       "  var render_items = [{\"docid\":\"0fcb1c21-c0d2-4cd4-a974-adc9459a705b\",\"elementid\":\"c076ec8b-01a5-4fc4-9b60-4c1b3f995fd1\",\"modelid\":\"e6425219-03b8-4943-93c8-b282cbf29038\"}];\n",
       "  root.Bokeh.embed.embed_items_notebook(docs_json, render_items);\n",
       "\n",
       "  }\n",
       "  if (root.Bokeh !== undefined) {\n",
       "    embed_document(root);\n",
       "  } else {\n",
       "    var attempts = 0;\n",
       "    var timer = setInterval(function(root) {\n",
       "      if (root.Bokeh !== undefined) {\n",
       "        embed_document(root);\n",
       "        clearInterval(timer);\n",
       "      }\n",
       "      attempts++;\n",
       "      if (attempts > 100) {\n",
       "        console.log(\"Bokeh: ERROR: Unable to run BokehJS code because BokehJS library is missing\")\n",
       "        clearInterval(timer);\n",
       "      }\n",
       "    }, 10, root)\n",
       "  }\n",
       "})(window);"
      ],
      "application/vnd.bokehjs_exec.v0+json": ""
     },
     "metadata": {
      "application/vnd.bokehjs_exec.v0+json": {
       "id": "e6425219-03b8-4943-93c8-b282cbf29038"
      }
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "r_het_hom_var_mean = vt.aggregate_cols(agg.mean(vt.sample_qc.r_het_hom_var))\n",
    "print('Het/hom var mean=%.2f' % r_het_hom_var_mean)\n",
    "p = hl.plot.histogram(vt.sample_qc.r_het_hom_var, range=(0,5), bins=30, title='Histogram of Het/Hom ratios', legend='Ti/Tv')\n",
    "show(p)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Finally, we write all variants (with QC annotations) to disk. This is now Hail's internal format which is faster to read from for future analyses."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2018-09-21 11:42:56 Hail: INFO: wrote 984253 items in 20 partitions to polaris.chr20.diplofy.mt\n"
     ]
    }
   ],
   "source": [
    "outfile='polaris.chr20.diplofy.mt'\n",
    "vt.write(outfile,overwrite=True) "
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
